home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Freelog 9
/
Freelog009.iso
/
BAS
/
Internet
/
Rtf2Html
/
Source C
/
RTFPARSE
/
RTFREADR.C
< prev
next >
Wrap
C/C++ Source or Header
|
1999-06-27
|
29KB
|
924 lines
/*
* %%File: rtfreadr.c
*
* Copyright (c) 1995-1999 Bertrand LE QUELLEC
* Copyright (c) 1989-1995 Microsoft Corporation.
*
* http://perso.wanadoo.fr/blq
* blq@wanadoo.fr
*/
#include <stdio.h>
#ifndef UNIX_SRC
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#endif
#include "html.h"
#include "rtftype.h"
#include "rtfdecl.h"
#include "mparse.h"
#include "rtfparse.h"
#include "rtfchar.h"
#include "rtfhtml.h"
#define SOURCE_READ 1
#include "rtfreadr.h"
static int ct = 0; /* un compteur */
/*
* %%Function: ecPrintChar
*
* Send a character to the output file.
*/
int ecPrintChar(int ch)
{
char Tab[MLEN*2];
int i = 0;
long nbimage = 0L;
static long oldimage = 0L;
static int oldCh = 0;
if(!flagGO || flagHeader)
return ecOK;
/* ---------------------------------------------------
* unfortunately, we don't do a whole lot here as far
* as layout goes...
*/
if (flagFootnote > 0)
{
/* Sauvegarde des notes de bas de page */
if (ch != '\n')
{
if(carnot+1 < MLEN * 2)
{
Notes[carnot++] = (char)ch;
Notes[carnot] = '\0';
}
}
return ecOK;
}
else if (flagFldrslt)
{
Fldrslt[nbResult++] = (char)ch;
Fldrslt[nbResult] = '\0';
}
else if (flagBKMK)
{
lesBkmk[nbBk++] = (char)ch;
lesBkmk[nbBk] = '\0';
}
else if(flagTableau)
{
/* Sauvegarde des cellules du tableau en cours */
if(carcell+1 < MLEN * 3)
{
cellule[carcell++] = (char)ch;
cellule[carcell] = '\0';
}
}
else if(flagPnText)
{
if(nbcar+1 < MLEN * 2)
{
if(ch == '\t')
lesCar[nbcar++] = ' ';
else
lesCar[nbcar++] = (char)ch;
lesCar[nbcar] = '\0';
}
return ecOK;
}
else if (niveau)
{
/* Pour un affichage HTML du niveau */
if (ch != '\n')
{
if(carniv+1 < MLEN * 2)
{
if(ch == '\t')
leNiveau[carniv++] = ' ';
else
leNiveau[carniv++] = (char)ch;
leNiveau[carniv] = '\0';
}
}
return ecOK;
}
else if (flagTOC)
{
lesCar[nbcar++] = (char)ch;
lesCar[nbcar] = '\0';
}
else
{
if (flagDD == 0 && tagDD == 1)
{
tagDD = 0;
libPrintString(TAG_BR, fpOut);
}
if(!modeSpecial && (ch == '<' || ch == '>' || ch == '&'))
{
sprintf(Tab, "%c", (char)ch);
fprintf(fpOut, "%s", HexaToHTML(Tab));
return ecOK;
}
else
{
if (nbUrl == 0 && ch == 'h') /* 1 */
{
/* Possibilite d'une adresse W3 */
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
flagStop = 0;
}
else if (nbUrl == 1 && oldCh == 'h' && ch == ' ')
{
putc('h', fpOut);
putc(ch, fpOut);
flagStop = 0;
nbUrl = 0;
Url[nbUrl] = '\0';
}
else if (nbUrl == 1 && oldCh == 'h' && ch == 't') /* 2 */
{
/* La probabilite s'accentue */
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
flagStop = 0;
}
else if (nbUrl == 2 && oldCh == 't' && ch == 't') /* 3 */
{
/* La probabilite s'accentue... */
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
flagStop = 0;
}
else if (nbUrl == 3 && oldCh == 't' && ch == 'p') /* 4 */
{
/* La probabilite est presque evidente... */
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
flagStop = 0;
}
else if (nbUrl == 4 && oldCh == 'p' && ch == ':') /* 5 */
{
/* La probabilite est evidente */
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
oldCh = -10;
flagStop = 0;
return ecOK;
}
else
{
if (flagStop || (ch != '/' && ch != ':' && ch != '#' && ch != '~' && ch != '.' && ch != '-' && ch != '_' && !isalnum(ch)))
{
if (strlen(Url) > 4)
{
libPrintString(GetHrefHTML2(Url, Url),fpOut);
}
flagStop = 0;
nbUrl = 0;
oldCh = 0;
Url[nbUrl] = '\0';
}
if (nbUrl && oldCh == -10)
{
Url[nbUrl++] = (char)ch;
Url[nbUrl] = '\0';
return ecOK;
}
else
{
if(strlen(Url))
{
/* Annulation adresse W3 */
libPrintString(Url, fpOut);
}
flagStop = 0;
nbUrl = 0;
Url[nbUrl] = '\0';
}
putc(ch, fpOut);
}
oldCh = ch;
}
}
return ecOK;
}
/*
* %%Function: ecParseChar
*
* Route the character to the appropriate destination stream.
*/
int ecParseChar(int ch)
{
if (ris == risBin && --cbBin <= 0)
ris = risNorm;
switch (rds)
{
case rdsSkip: /* Toss this character. */
break;
case rdsNorm:
/* Output a character.
* Properties are valid at this point. */
return ecPrintChar(ch);
default: /* handle other destinations.... */
break;
}
return ecOK;
}
/*
* %%Function: ecPushRtfState
*
* Save relevant info on a linked list of SAVE structures.
*/
static int ecPushRtfState(void)
{
#ifdef UNIX_SRC
pSAVE psaveNew = (pSAVE)calloc(1, sizeof(SAVE));
if (!psaveNew)
return ecStackOverflow;
#else
HGLOBAL HGLB = GlobalAlloc(GPTR, sizeof(SAVE));
pSAVE psaveNew = (pSAVE)GlobalLock(HGLB);
if(!HGLB)
return ecNO;
if(!psaveNew)
return ecNO;
psaveNew->phandle = HGLB;
#endif
psaveNew->pNext = psave;
psaveNew->chp = chp;
psaveNew->pap = pap;
psaveNew->sep = sep;
psaveNew->dop = dop;
psaveNew->rds = rds;
psaveNew->ris = ris;
psave = psaveNew;
ris = risNorm;
cGroup++;
return ecOK;
}
/*
* %%Function: ecPopRtfState
*
* If we're ending a destination (i.e., the destination is changing),
* call ecEndGroupAction.
* Always restore relevant info from the top of the SAVE list.
*/
static int ecPopRtfState(void)
{
int ec = 0;
pSAVE psaveOld = (pSAVE)0;
if (!psave)
return ecStackUnderflow;
if (rds != psave->rds)
{
if ((ec = ecEndGroupAction(rds)) != ecOK)
return ec;
}
chp = psave->chp;
pap = psave->pap;
sep = psave->sep;
dop = psave->dop;
rds = psave->rds;
ris = psave->ris;
psaveOld = psave;
psave = psave->pNext;
#ifdef UNIX_SRC
free(psaveOld);
#else
if(psaveOld->phandle)
{
GlobalUnlock(psaveOld->phandle);
GlobalFree(psaveOld->phandle);
}
#endif
cGroup--;
return ecOK;
}
/*
* %%Function: ecParseRtfKeyword
*
* Step 2 :
* get a control word (and it's associated value) and
* call ecTranslateKeyword to dispatch the control.
*/
static int ecParseRtfKeyword(FILE * fp)
{
int ch = 0;
char fParam = fFalse;
char fNeg = fFalse;
int param = 0;
int flag = 0;
char * pch = (char *)0;
char szKeyword [30];
char szParameter [20];
szKeyword[0] = '\0';
szParameter[0] = '\0';
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
/* a control symbol ; no delimiter. */
if (!isalpha(ch) && ch != '\'')
{
szKeyword[0] = (char) ch;
szKeyword[1] = '\0';
return ecTranslateKeyword((char *)szKeyword, 0, fParam);
}
/* Caractere special : accent */
if (ch == '\'')
{
/* Les caracteres accentues sont stockes sous forme hexa
* sur deux caracteres apres le code RTF \'
*/
flag = 1;
/* Enregistrement du caractere special quote ' */
szKeyword[0] = (char) ch;
szKeyword[1] = '\0';
pch = (char *)szKeyword;
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
szKeyword[1] = (char) ch;
/* Si le second code hexa est un chiffre
* on le recupere plus loin pour le passer a param
* si le code est un caractere on le stocke.
*/
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
szKeyword[2] = '\0';
if (!isdigit(ch))
{
fParam = fFalse;
param = ch;
}
}
else
{
for (pch = (char *)szKeyword; isalpha(ch); ch = getc(fp))
*pch++ = (char) ch;
*pch = '\0';
}
if (ch == '-')
{
fNeg = fTrue;
if ((ch = getc(fp)) == EOF)
return ecEndOfFile;
}
if (isdigit(ch))
{
/* a digit after the control means we have a parameter */
fParam = fTrue;
/* Dans le cas d'une detection de caracteres speciaux */
if (!flag)
{
for (pch = (char *)szParameter; isdigit(ch); ch = getc(fp))
*pch++ = (char) ch;
}
else
{
/* recuperation d'un seul caractere. */
pch = (char *)szParameter;
*pch++ = (char) ch;
}
*pch = '\0';
param = atoi(szParameter);
if (fNeg)
param = -param;
lParam = atol(szParameter);
if (fNeg)
param = -param;
}
/* Inutile de renvoyer le caractere special */
if (ch != ' ' && !flag)
ungetc(ch, fp);
return ecTranslateKeyword(szKeyword, param, fParam);
}
/*
* %%Function: ecRtfParse
*
* Step 1:
* Isolate RTF keywords and send them to ecParseRtfKeyword;
* Push and pop state at the start and end of RTF groups;
* Send text to ecParseChar for further processing.
*/
int ecRtfParse(FILE * fp)
{
register int ch = 0, ct;
register int oldCh = 0;
int ec = 0;
int cNibble = 2;
int b = 0;
int nbtok = 0;
int par = 0;
int flagTok = 0;
char token[MLEN * 2], tmp[MLEN];
while ((ch = getc(fp)) != EOF)
{
if (cGroup < 0)
return ecStackUnderflow;
/* if we're parsing binary data, handle it directly */
if (ris == risBin)
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{
switch (ch)
{
case '{':
if(flagFootnote > 0)
par += 1;
if(crochet < 0) crochet = 0;
crochet += 1;
if ((ec = ecPushRtfState()) != ecOK)
return ec;
break;
case '}':
if (flagBKMK)
{
/* Detection d'une arrivee d'ancre : insertion d'un point d'arrivee */
if (strlen(lesBkmk))
{
if (strlen(lesBkmk) >= 4)
{
if (strncmp(lesBkmk, "_Toc", 4))
{
if(flagTableau)
{
sprintf(tmp, "%s", GetANameHTML2(lesBkmk));
for (ct = 0; ct < (int)strlen(tmp); ct++)
{
/* Sauvegarde des infos dans la cellule courante du tableau */
if(carcell+1 < MLEN * 3)
{
cellule[carcell++] = tmp[ct];
cellule[carcell] = '\0';
}
else
break;
}
}
else
libPrintString(GetANameHTML2(lesBkmk), fpOut);
}
}
else
{
if(flagTableau)
{
sprintf(tmp, "%s", GetANameHTML2(lesBkmk));
for (ct = 0; ct < (int)strlen(tmp); ct++)
{
/* Sauvegarde des infos dans la cellule courante du tableau */
if(carcell+1 < MLEN * 3)
{
cellule[carcell++] = tmp[ct];
cellule[carcell] = '\0';
}
else
break;
}
}
else
libPrintString(GetANameHTML2(lesBkmk), fpOut);
}
}
flagBKMK = 0;
nbBk = 0;
lesBkmk[nbBk] = '\0';
}
else if (flagFldrslt)
{
/* Detection d'un renvoie : insertion d'une ancre interne point de dΘpart */
if (strlen(Fldrslt))
{
if (strlen(lesBkmk))
{
if(flagTableau)
{
sprintf(tmp, "%s", GetAHNameHTML2(lesBkmk, Fldrslt));
for (ct = 0; ct < (int)strlen(tmp); ct++)
{
/* Sauvegarde des infos dans la cellule courante du tableau */
if(carcell+1 < MLEN * 3)
{
cellule[carcell++] = tmp[ct];
cellule[carcell] = '\0';
}
else
break;
}
}
else
libPrintString(GetAHNameHTML2(lesBkmk, Fldrslt), fpOut);
}
else
{
if(flagTableau)
{
for (ct = 0; ct < (int)strlen(Fldrslt); ct++)
{
/* Sauvegarde des infos dans la cellule courante du tableau */
if(carcell+1 < MLEN * 3)
{
cellule[carcell++] = Fldrslt[ct];
cellule[carcell] = '\0';
}
else
break;
}
}
else
libPrintString(Fldrslt, fpOut);
}
}
flagFldrslt = 0;
nbResult = nbBk = 0;
Fldrslt[nbResult] = lesBkmk[nbBk] = '\0';
}
;
if(flagFldrslt)
{
flagFldrslt = 0;
}
if(flagFootnote > 0)
par -= 1;
if(par < 0)
{
par = 0;
flagFootnote = -1;
}
/* Affichage de fin de style eventuel */
ecPrintItalicF (fpOut);
ecPrintBoldF (fpOut);
if ((ec = ecPopRtfState()) != ecOK)
return ec;
if(valCrochetH == crochet)
flagHeader = 0;
if(valCrochetF == crochet)
valCrochetF = 0;
flagREF = 0;
crochet -= 1;
break;
case '\\':
if ((ec = ecParseRtfKeyword(fp)) != ecOK)
return ec;
break;
case 0x0d:
case 0x0a:/* cr and lf are noise characters... */
break;
default:
if(crochet)
{
if(ch == ' ')
{
token[nbtok] = '\0';
if(!(strcmp(token, "SYMBOL")) || !(strcmp(token, "symbol")))
{
flagTok = 1;
}
else if(!(strcmp(token, "TOC")))
{
flagTok = 0;
flagTOC = 1;
ecPrintItalicF (fpOut);
}
else if(!(strcmp(lesBkmk, "REF")))
{
flagREF = 1;
nbBk = 0;
lesBkmk[nbBk] = '\0';
}
else
{
if (flagREF && nbBk)
{
flagREF = 0;
nbBk = 0;
}
else if(flagTok)
{
flagTok = 0;
switch(atoi(token))
{
case 35:
libPrintString(TokenToHTML("numbersign"), fpOut);
break;
case 37:
libPrintString(TokenToHTML("percent"), fpOut);
break;
case 38:
libPrintString(TokenToHTML("ampersand"), fpOut);
break;
case 95:
libPrintString(TokenToHTML("underscore"), fpOut);
break;
case 97:
libPrintString(TokenToHTML("alpha"), fpOut);
break;
case 98:
libPrintString(TokenToHTML("beta"), fpOut);
break;
case 171:
libPrintString(TokenToHTML("arrowboth"), fpOut);
break;
case 172:
libPrintString(TokenToHTML("arrowleft"), fpOut);
break;
case 174:
libPrintString(TokenToHTML("arrowright"), fpOut);
break;
case 176:
libPrintString(TokenToHTML("degree"), fpOut);
break;
case 177:
libPrintString(TokenToHTML("plusminus"), fpOut);
break;
case 179:
libPrintString(TokenToHTML("greaterequal"), fpOut);
break;
case 183:
if(!flagTableau)
{
if (!tagLI)
{
tagLI = 1;
libPrintCharStd('\n', fpOut);
libPrintString(TAG_BQUOTE, fpOut);
libPrintString(TAG_LI, fpOut);
}
}
break;
case 184:
libPrintString(TokenToHTML("mathdivise"), fpOut);
break;
case 185:
libPrintString(TokenToHTML("notequal"), fpOut);
break;
case 187:
libPrintString(TokenToHTML("tilde"), fpOut);
break;
case 188:
libPrintString(TokenToHTML("ellipsis"), fpOut);
break;
case 210:
libPrintString(TokenToHTML("registred"), fpOut);
break;
case 211:
libPrintString(TokenToHTML("copyright"), fpOut);
break;
case 212:
case 228:
libPrintString(TokenToHTML("trademark"), fpOut);
break;
case 219:
libPrintString(TokenToHTML("arrowdblboth"), fpOut);
break;
case 220:
libPrintString(TokenToHTML("arrowdblleft"), fpOut);
break;
case 222:
libPrintString(TokenToHTML("arrowdblright"), fpOut);
break;
}
}
}
nbtok = 0;
token[nbtok] = '\0';
}
else
{
if(nbtok+1 < MLEN*2 && (nbtok || flagTok))
token[nbtok++] = ch;
if (nbBk+1 < MLEN && (nbBk || flagREF) && !flagBKMK)
{
lesBkmk[nbBk++] = (char)ch;
lesBkmk[nbBk] = '\0';
}
/* Symbol - symbol - TOK - REF */
if(ch == 's' || ch == 'S' || ch == 'T')
{
nbtok = 0;
token[nbtok++] = ch;
}
else if (ch == 'R' && oldCh == ' ')
{
nbBk = 0;
lesBkmk[nbBk++] = (char)ch;
lesBkmk[nbBk] = '\0';
}
}
}
if (ris == risNorm)
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
}
else
{
/* parsing hex data */
if (ris != risHex)
return ecAssertion;
b = b << 4;
if (isdigit(ch))
b += (char) ch - '0';
else
{
if (islower(ch))
{
if (ch < 'a' || ch > 'f')
return ecInvalidHex;
b += (char) ch - 'a';
}
else
{
if (ch < 'A' || ch > 'F')
return ecInvalidHex;
b += (char) ch - 'A';
}
}
cNibble--;
if (!cNibble)
{
if ((ec = ecParseChar(ch)) != ecOK)
return ec;
cNibble = 2;
b = 0;
ris = risNorm;
}
} /* end else (ris != risNorm) */
break;
} /* switch */
} /* else (ris != risBin) */
oldCh = ch;
} /* while */
ecDetectPard();
ecFootnote();
if (cGroup < 0)
return ecStackUnderflow;
if (cGroup > 0)
return ecUnmatchedBrace;
if (flagFRAME)
{
libPrintCharStd('\n', fpTable);
for (ec = 0; ec <= oldNiveau; ec++)
libPrintString(TAGf_UL, fpTable);
}
return ecOK;
}